{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6875ece4-37ee-43fb-a146-d5239c6d94f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bc3578f1-0869-423f-979c-e2efcfa6d301",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import patches\n",
    "import seaborn as sns\n",
    "import numpy as np\n",
    "import warnings\n",
    "import statsmodels.api as sm\n",
    "import statsmodels.formula.api as smf\n",
    "\n",
    "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
    "import sklearn\n",
    "from sklearn.impute import SimpleImputer"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "63ab92b4-ddf6-44d6-91ff-74bf1a467c36",
   "metadata": {},
   "source": [
    "方差膨胀因子（VIF）是衡量多元线性回归模型中多重共线性的一种度量。VIF的计算公式为:\n",
    "\n",
    "$VIF= \\frac{1}{1-R^2_i}$\n",
    "\n",
    "其中，$R_i$是自变量$X_i$对其余自变量作回归分析的负相关系数。VIF越大，自变量间存在多重共线性的可能性就越高。一般来说，如果VIF大于5，解释变量$X_i$就于其他变量存在高度的多重共线性，参数估计将出现较大的标准差。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8ad18a99-5140-4e32-8cc9-909953725fd4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function that converts categorical values into numerical values via ordinal encoding or one-hot encoding\n",
    "from sklearn.preprocessing import OrdinalEncoder\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "# Function to split data into different groups\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.feature_selection import VarianceThreshold\n",
    "# Statistics functions\n",
    "from scipy.stats import norm\n",
    "from scipy import stats\n",
    "from scipy.stats import chi2_contingency\n",
    "from scipy.stats import chi2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7b69a035-46cd-4a52-be5e-9ba8022f3490",
   "metadata": {},
   "outputs": [],
   "source": [
    "#忽略警告\n",
    "warnings.filterwarnings(\"ignore\") \n",
    "\n",
    "#一个在 jupyter Notebook中展示图像的魔法函数\n",
    "%matplotlib inline \n",
    "\n",
    "#改变seaborn中默认的坐标轴大小\n",
    "sns.set(rc={\"figure.figsize\": (20, 15)})\n",
    "\n",
    "#设置绘图默认的背景模式\n",
    "sns.set_style(\"whitegrid\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5506050c-387b-48d5-9e75-d080469c50d4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1460, 81), (1459, 80))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "path = os.path.join('.', 'train.csv')\n",
    "\n",
    "df_train = pd.read_csv(path)\n",
    "df_test = pd.read_csv(os.path.join('.', 'test.csv'))\n",
    "df_train.shape,df_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "57d0f9eb-040e-4c49-a9c0-b1065b92775b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',\n",
       "       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',\n",
       "       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',\n",
       "       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',\n",
       "       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',\n",
       "       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',\n",
       "       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',\n",
       "       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',\n",
       "       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',\n",
       "       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',\n",
       "       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',\n",
       "       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',\n",
       "       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',\n",
       "       'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF',\n",
       "       'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PoolQC',\n",
       "       'Fence', 'MiscFeature', 'MiscVal', 'MoSold', 'YrSold', 'SaleType',\n",
       "       'SaleCondition', 'SalePrice'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e1b61cb5-b0f0-47be-8b50-3ce1786ffc22",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1460 entries, 0 to 1459\n",
      "Data columns (total 81 columns):\n",
      " #   Column         Non-Null Count  Dtype  \n",
      "---  ------         --------------  -----  \n",
      " 0   Id             1460 non-null   int64  \n",
      " 1   MSSubClass     1460 non-null   int64  \n",
      " 2   MSZoning       1460 non-null   object \n",
      " 3   LotFrontage    1201 non-null   float64\n",
      " 4   LotArea        1460 non-null   int64  \n",
      " 5   Street         1460 non-null   object \n",
      " 6   Alley          91 non-null     object \n",
      " 7   LotShape       1460 non-null   object \n",
      " 8   LandContour    1460 non-null   object \n",
      " 9   Utilities      1460 non-null   object \n",
      " 10  LotConfig      1460 non-null   object \n",
      " 11  LandSlope      1460 non-null   object \n",
      " 12  Neighborhood   1460 non-null   object \n",
      " 13  Condition1     1460 non-null   object \n",
      " 14  Condition2     1460 non-null   object \n",
      " 15  BldgType       1460 non-null   object \n",
      " 16  HouseStyle     1460 non-null   object \n",
      " 17  OverallQual    1460 non-null   int64  \n",
      " 18  OverallCond    1460 non-null   int64  \n",
      " 19  YearBuilt      1460 non-null   int64  \n",
      " 20  YearRemodAdd   1460 non-null   int64  \n",
      " 21  RoofStyle      1460 non-null   object \n",
      " 22  RoofMatl       1460 non-null   object \n",
      " 23  Exterior1st    1460 non-null   object \n",
      " 24  Exterior2nd    1460 non-null   object \n",
      " 25  MasVnrType     588 non-null    object \n",
      " 26  MasVnrArea     1452 non-null   float64\n",
      " 27  ExterQual      1460 non-null   object \n",
      " 28  ExterCond      1460 non-null   object \n",
      " 29  Foundation     1460 non-null   object \n",
      " 30  BsmtQual       1423 non-null   object \n",
      " 31  BsmtCond       1423 non-null   object \n",
      " 32  BsmtExposure   1422 non-null   object \n",
      " 33  BsmtFinType1   1423 non-null   object \n",
      " 34  BsmtFinSF1     1460 non-null   int64  \n",
      " 35  BsmtFinType2   1422 non-null   object \n",
      " 36  BsmtFinSF2     1460 non-null   int64  \n",
      " 37  BsmtUnfSF      1460 non-null   int64  \n",
      " 38  TotalBsmtSF    1460 non-null   int64  \n",
      " 39  Heating        1460 non-null   object \n",
      " 40  HeatingQC      1460 non-null   object \n",
      " 41  CentralAir     1460 non-null   object \n",
      " 42  Electrical     1459 non-null   object \n",
      " 43  1stFlrSF       1460 non-null   int64  \n",
      " 44  2ndFlrSF       1460 non-null   int64  \n",
      " 45  LowQualFinSF   1460 non-null   int64  \n",
      " 46  GrLivArea      1460 non-null   int64  \n",
      " 47  BsmtFullBath   1460 non-null   int64  \n",
      " 48  BsmtHalfBath   1460 non-null   int64  \n",
      " 49  FullBath       1460 non-null   int64  \n",
      " 50  HalfBath       1460 non-null   int64  \n",
      " 51  BedroomAbvGr   1460 non-null   int64  \n",
      " 52  KitchenAbvGr   1460 non-null   int64  \n",
      " 53  KitchenQual    1460 non-null   object \n",
      " 54  TotRmsAbvGrd   1460 non-null   int64  \n",
      " 55  Functional     1460 non-null   object \n",
      " 56  Fireplaces     1460 non-null   int64  \n",
      " 57  FireplaceQu    770 non-null    object \n",
      " 58  GarageType     1379 non-null   object \n",
      " 59  GarageYrBlt    1379 non-null   float64\n",
      " 60  GarageFinish   1379 non-null   object \n",
      " 61  GarageCars     1460 non-null   int64  \n",
      " 62  GarageArea     1460 non-null   int64  \n",
      " 63  GarageQual     1379 non-null   object \n",
      " 64  GarageCond     1379 non-null   object \n",
      " 65  PavedDrive     1460 non-null   object \n",
      " 66  WoodDeckSF     1460 non-null   int64  \n",
      " 67  OpenPorchSF    1460 non-null   int64  \n",
      " 68  EnclosedPorch  1460 non-null   int64  \n",
      " 69  3SsnPorch      1460 non-null   int64  \n",
      " 70  ScreenPorch    1460 non-null   int64  \n",
      " 71  PoolArea       1460 non-null   int64  \n",
      " 72  PoolQC         7 non-null      object \n",
      " 73  Fence          281 non-null    object \n",
      " 74  MiscFeature    54 non-null     object \n",
      " 75  MiscVal        1460 non-null   int64  \n",
      " 76  MoSold         1460 non-null   int64  \n",
      " 77  YrSold         1460 non-null   int64  \n",
      " 78  SaleType       1460 non-null   object \n",
      " 79  SaleCondition  1460 non-null   object \n",
      " 80  SalePrice      1460 non-null   int64  \n",
      "dtypes: float64(3), int64(35), object(43)\n",
      "memory usage: 924.0+ KB\n"
     ]
    }
   ],
   "source": [
    "df_train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "011aeca4-ccdc-4afa-a5b5-7fe10e14cd55",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train.drop(['Id'], axis=1, inplace=True)\n",
    "Id_test_list = df_test[\"Id\"].tolist()\n",
    "df_test.drop(['Id'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "72a16482-d945-47fc-a193-79d50959de25",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>OverallQual</th>\n",
       "      <th>OverallCond</th>\n",
       "      <th>YearBuilt</th>\n",
       "      <th>YearRemodAdd</th>\n",
       "      <th>MasVnrArea</th>\n",
       "      <th>BsmtFinSF1</th>\n",
       "      <th>BsmtFinSF2</th>\n",
       "      <th>...</th>\n",
       "      <th>WoodDeckSF</th>\n",
       "      <th>OpenPorchSF</th>\n",
       "      <th>EnclosedPorch</th>\n",
       "      <th>3SsnPorch</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>60</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2003</td>\n",
       "      <td>2003</td>\n",
       "      <td>196.0</td>\n",
       "      <td>706</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>61</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>208500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>1976</td>\n",
       "      <td>1976</td>\n",
       "      <td>0.0</td>\n",
       "      <td>978</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>298</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>181500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>60</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2001</td>\n",
       "      <td>2002</td>\n",
       "      <td>162.0</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>42</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>223500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>70</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>1915</td>\n",
       "      <td>1970</td>\n",
       "      <td>0.0</td>\n",
       "      <td>216</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>272</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>140000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>60</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>2000</td>\n",
       "      <td>2000</td>\n",
       "      <td>350.0</td>\n",
       "      <td>655</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>192</td>\n",
       "      <td>84</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   MSSubClass  LotFrontage  LotArea  OverallQual  OverallCond  YearBuilt  \\\n",
       "0          60         65.0     8450            7            5       2003   \n",
       "1          20         80.0     9600            6            8       1976   \n",
       "2          60         68.0    11250            7            5       2001   \n",
       "3          70         60.0     9550            7            5       1915   \n",
       "4          60         84.0    14260            8            5       2000   \n",
       "\n",
       "   YearRemodAdd  MasVnrArea  BsmtFinSF1  BsmtFinSF2  ...  WoodDeckSF  \\\n",
       "0          2003       196.0         706           0  ...           0   \n",
       "1          1976         0.0         978           0  ...         298   \n",
       "2          2002       162.0         486           0  ...           0   \n",
       "3          1970         0.0         216           0  ...           0   \n",
       "4          2000       350.0         655           0  ...         192   \n",
       "\n",
       "   OpenPorchSF  EnclosedPorch  3SsnPorch  ScreenPorch  PoolArea  MiscVal  \\\n",
       "0           61              0          0            0         0        0   \n",
       "1            0              0          0            0         0        0   \n",
       "2           42              0          0            0         0        0   \n",
       "3           35            272          0            0         0        0   \n",
       "4           84              0          0            0         0        0   \n",
       "\n",
       "   MoSold  YrSold  SalePrice  \n",
       "0       2    2008     208500  \n",
       "1       5    2007     181500  \n",
       "2       9    2008     223500  \n",
       "3       2    2006     140000  \n",
       "4      12    2008     250000  \n",
       "\n",
       "[5 rows x 37 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#提取数值特征\n",
    "df_train_num = df_train.select_dtypes(include=[np.number])\n",
    "df_train_num.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "9a72d74e-9882-4be3-88b7-ffd005124f77",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>OverallQual</th>\n",
       "      <th>OverallCond</th>\n",
       "      <th>YearBuilt</th>\n",
       "      <th>YearRemodAdd</th>\n",
       "      <th>MasVnrArea</th>\n",
       "      <th>BsmtFinSF1</th>\n",
       "      <th>BsmtFinSF2</th>\n",
       "      <th>...</th>\n",
       "      <th>GarageArea</th>\n",
       "      <th>WoodDeckSF</th>\n",
       "      <th>OpenPorchSF</th>\n",
       "      <th>EnclosedPorch</th>\n",
       "      <th>3SsnPorch</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20</td>\n",
       "      <td>80.0</td>\n",
       "      <td>11622</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1961</td>\n",
       "      <td>1961</td>\n",
       "      <td>0.0</td>\n",
       "      <td>468.0</td>\n",
       "      <td>144.0</td>\n",
       "      <td>...</td>\n",
       "      <td>730.0</td>\n",
       "      <td>140</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>120</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>81.0</td>\n",
       "      <td>14267</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>1958</td>\n",
       "      <td>1958</td>\n",
       "      <td>108.0</td>\n",
       "      <td>923.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>312.0</td>\n",
       "      <td>393</td>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12500</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>60</td>\n",
       "      <td>74.0</td>\n",
       "      <td>13830</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1997</td>\n",
       "      <td>1998</td>\n",
       "      <td>0.0</td>\n",
       "      <td>791.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>482.0</td>\n",
       "      <td>212</td>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>60</td>\n",
       "      <td>78.0</td>\n",
       "      <td>9978</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>1998</td>\n",
       "      <td>1998</td>\n",
       "      <td>20.0</td>\n",
       "      <td>602.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>470.0</td>\n",
       "      <td>360</td>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>120</td>\n",
       "      <td>43.0</td>\n",
       "      <td>5005</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>1992</td>\n",
       "      <td>1992</td>\n",
       "      <td>0.0</td>\n",
       "      <td>263.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>506.0</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>144</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 36 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   MSSubClass  LotFrontage  LotArea  OverallQual  OverallCond  YearBuilt  \\\n",
       "0          20         80.0    11622            5            6       1961   \n",
       "1          20         81.0    14267            6            6       1958   \n",
       "2          60         74.0    13830            5            5       1997   \n",
       "3          60         78.0     9978            6            6       1998   \n",
       "4         120         43.0     5005            8            5       1992   \n",
       "\n",
       "   YearRemodAdd  MasVnrArea  BsmtFinSF1  BsmtFinSF2  ...  GarageArea  \\\n",
       "0          1961         0.0       468.0       144.0  ...       730.0   \n",
       "1          1958       108.0       923.0         0.0  ...       312.0   \n",
       "2          1998         0.0       791.0         0.0  ...       482.0   \n",
       "3          1998        20.0       602.0         0.0  ...       470.0   \n",
       "4          1992         0.0       263.0         0.0  ...       506.0   \n",
       "\n",
       "   WoodDeckSF  OpenPorchSF  EnclosedPorch  3SsnPorch  ScreenPorch  PoolArea  \\\n",
       "0         140            0              0          0          120         0   \n",
       "1         393           36              0          0            0         0   \n",
       "2         212           34              0          0            0         0   \n",
       "3         360           36              0          0            0         0   \n",
       "4           0           82              0          0          144         0   \n",
       "\n",
       "   MiscVal  MoSold  YrSold  \n",
       "0        0       6    2010  \n",
       "1    12500       6    2010  \n",
       "2        0       3    2010  \n",
       "3        0       6    2010  \n",
       "4        0       1    2010  \n",
       "\n",
       "[5 rows x 36 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test_num = df_test.select_dtypes(include=[np.number])\n",
    "df_test_num.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "37d6acbc-c182-491b-9965-4a2c0a38519d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
